knitr::opts_chunk$set(echo = TRUE)
knitr::opts_chunk$set(warning = FALSE, message = FALSE)
# Load libraries
# Set general input paths to all analysis files
analysis.path <- "/mnt/bb_dqbm_volume/analysis/Immucan_lung/training/full"
## Input data
data.path <- "/mnt/bb_dqbm_volume/data/Immucan_lung"
# Set path to masks
masks.path <- file.path(data.path, "masks")
# Set path to random forest classifier
rf.path <- file.path(data.path, "rffit_v7_all_markers.rds")
# Import helper fncs
source("../analysis/helpers/helper_fnc.R")
## Read results
# Read in all results for tonsil and lung comparison into one dataframe
results.files <- list.files(analysis.path, 'simulation_results.txt',
full.names=TRUE, recursive=TRUE)
results.files <- results.files[grepl("normalized", results.files)]
results.df <- lapply(results.files, read_results, type="res", voi="norm") %>%
bind_rows() %>%
mutate(norm=ifelse(grepl("combi", norm), "combi", norm))
## Read X_test and X_simulated
# Specify X_test and X_simulated files to be read in
X.files <- list.files(analysis.path, "X_", full.names=TRUE, recursive=TRUE)
X.files <- X.files[grepl("normalized", X.files)]
# Read in all sce experiments from saved anndata and save additional info in metadata
# (e.g. which dataset is used in training and testing, if spillover correction was used
# and if it is the ground truth or simulated X)
X.list <- lapply(X.files, read_results, type="x", voi="norm")
X.list <- lapply(X.list, function(sce.temp){
if (grepl("combi", metadata(sce.temp)$norm)){
metadata(sce.temp)$norm <- "combi"
}
assay(sce.temp, "exprs") <- asinh(counts(sce.temp)/1)
sce.temp
})
# Read in masks for lung data
masks <- loadImages(masks.path, as.is = TRUE)
mcols(masks) <- DataFrame(sample_id=names(masks))
## Read random forest classifier
# Read RF classifier for the lung dataset
rffit.lung <- readRDS(rf.path)
For each different measurement used to test training results, a barplot is shown comparing spillover corrected and uncorrected results for Tonsil th152 data.
# For each different measurement of training results, plot barplot comparing
# spillover corrected and uncorrected results
# Melt dataframe for plotting
data_temp <- results.df %>%
dplyr::select(-c("dataset", "training", "datasize", "version", "Best crossvalidation fold")) %>%
pivot_longer(!c("norm", "simulation"), names_to="measure", values_to="value")
# Create barplot
results.barplot <- plot_cisi_results(data_temp, "measure", "value", "norm")
print(results.barplot)
The following plots show scatterplots of different proteins of interest where cells are coloured according to celltypes of interest for Immucan lung dataset once trained on normalized and once trained on unnormalized datasets.
names(X.list) <- lapply(X.list, function(x){metadata(x)$ground_truth})
X.list <- append(X.list[-c(1:2)], X.list[1:2])
# List of combinations of proteins of interest
poi <- list(c("CD20", "CD3", "B"), c("VISTA", "CD3", ""))
for (p_vec in poi) {
cat("###", p_vec[1], "vs", p_vec[2], "\n")
# Plot asinh transformed counts of proteins of interest of decomposed data trained
# with and without normalization
X.spillover <- plot_grid(plot_exprs(X.list[1:2], p_vec[3], p_vec[1], p_vec[2]),
plot_exprs(X.list[3:4], p_vec[3], p_vec[1], p_vec[2]),
plot_exprs(X.list[5:6], p_vec[3], p_vec[1], p_vec[2]),
ncol=1, labels=unique(unlist(lapply(X.list, function(sce){
metadata(sce)$norm}))),
label_size=15, hjust=c(-4.5, -3, -7.5), vjust=0.9, scale = 0.9)
print(X.spillover)
cat("\n\n")
}
Next, we use a pre-trained random forest classifier to predict celltypes of the decomposed Immucan lung dataset using the arcsinh transformed decomposed counts. We then compare these results to the ‘ground truth’ celltypes (celltypes predicted from ground truth data) by calculating the precision, sensitivity and specififity.
# TODO: test this
# Predict cell type using pretrained classifier on decomposed results and
# compute confusion matrix using celltypes of true data
lungSim.exprs <- lapply(X.list, compute_confusionMatrix, rffit=rffit.lung)
# Plot prediction results/confusion matrix as shown in IMCDataAnalysis script
i <- 1
for (cm in lungSim.exprs){
cat("###", names(lungSim.exprs)[i], ",", metadata(X.list[[i]])$norm, "\n")
pred.plot <- data.frame(cm$byClass) %>%
mutate(class=sub("Class: ", "", rownames(cm$byClass))) %>%
ggplot() +
geom_point(aes(1 - Specificity, Sensitivity,
size=Detection.Rate,
fill=class),
shape=21) +
scale_fill_igv() +
theme_cowplot(10) +
ylab("Sensitivity (TPR)") +
xlab("1 - Specificity (FPR)")
print(pred.plot)
cat("\n\n")
}